In [19]:
import numpy as np
import pandas as pd
import csv
import plotly.graph_objects as go
import numpy as np
import plotly.express as px

Nacitanie dat

Trieda pre prehľadnosť výpisov v jupyter notebooku.

In [ ]:
class display(object):
    template = """<div style="float: left; padding: 10px;">
    <p style = 'font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    
    def __init__(self, *a):
        self.args = a
    
    def _repr_html_(self):
        return '\n'.join(self.template.format(a,eval(a)._repr_html_())
                        for a in self.args)
    def __repr__(self):
        return '\n\n'.join(a+'\n'+repr(eval(a))
                          for a in self.args)

Načítanie dát do DataFramu a aplikácia funkcie apply na prepis 0/1 --> zaznacene/nezaznacene

In [4]:
df_height = pd.read_csv('/home/zuzka/Desktop/statistics/import_clevermaps.csv',sep = ',')
df_height['zaznacene'] = df_height['zaznacene'].apply(lambda x: 'zaznacene' if x == 1 else 'nezaznacene')
df_height
Out[4]:
kod vyska_zmerena vyska_odhad_pocty_bytu vyska_odhad_pocty_pater zpusob_vyuziti lng lat vyska_kompletni zaznacene
0 18878776 0.39 NaN NaN 8.0 16.515824 49.240423 8.000000 nezaznacene
1 18880266 0.23 NaN NaN 8.0 16.513492 49.244802 8.000000 nezaznacene
2 18882994 -0.58 NaN NaN 8.0 16.524073 49.238074 8.000000 nezaznacene
3 18883362 0.03 NaN NaN 8.0 16.506399 49.253171 8.000000 nezaznacene
4 19483082 0.01 NaN NaN 18.0 16.551800 49.226919 3.000000 nezaznacene
... ... ... ... ... ... ... ... ... ...
75320 19225253 0.06 9.887923 10.463499 7.0 16.596248 49.158506 9.887923 nezaznacene
75321 19282427 0.39 NaN NaN NaN 16.649653 49.233950 NaN nezaznacene
75322 24662461 0.33 7.899848 8.083549 7.0 16.600669 49.230460 7.899848 nezaznacene
75323 19527454 -0.30 7.899848 8.083549 3.0 16.599936 49.217687 7.899848 nezaznacene
75324 87069229 0.11 7.899848 8.186031 7.0 16.606801 49.233369 7.899848 nezaznacene

75325 rows × 9 columns

Box Plots pre jednotlivé výsledky výšok

In [6]:
x0 = df_height['vyska_odhad_pocty_bytu'].to_list()
x1 = df_height['vyska_odhad_pocty_pater'].to_list()
x2 = df_height['vyska_kompletni'].to_list()

fig = go.Figure()
# Use x instead of y argument for horizontal plot
fig.add_trace(go.Box(x=x0, name='vyska_odhad_pocty_bytu'))
fig.add_trace(go.Box(x=x1,name='vyska_odhad_pocty_pater'))
fig.add_trace(go.Box(x=x2, name ='vyska_kompletni'))

fig.show()
020406080vyska_odhad_pocty_bytuvyska_odhad_pocty_patervyska_kompletni
vyska_odhad_pocty_bytuvyska_odhad_pocty_patervyska_kompletni
In [7]:
charakteristika_patra = df_height['vyska_odhad_pocty_pater'].describe().reset_index()
charakteristika_byty = df_height['vyska_odhad_pocty_bytu'].describe().reset_index()
charakteristika_komplet = df_height['vyska_kompletni'].describe().reset_index()
In [8]:
display('charakteristika_patra', 'charakteristika_byty', 'charakteristika_komplet')
Out[8]:

charakteristika_patra

index vyska_odhad_pocty_pater
0 count 43599.000000
1 mean 10.117817
2 std 4.489932
3 min 3.660000
4 25% 8.083549
5 50% 8.083549
6 75% 10.463499
7 max 38.693889

charakteristika_byty

index vyska_odhad_pocty_bytu
0 count 39172.000000
1 mean 10.445681
2 std 4.574093
3 min 5.390000
4 25% 7.899848
5 50% 7.899848
6 75% 9.887923
7 max 42.130000

charakteristika_komplet

index vyska_kompletni
0 count 71345.000000
1 mean 8.224663
2 std 5.461309
3 min 1.000000
4 25% 4.030000
5 50% 7.810000
6 75% 9.990000
7 max 82.610000

Koľko budov padlo do jednotlivých intervalov pre výšku

In [9]:
df_height['vyska_odhad_pocty_bytu_bins'] = pd.cut(x=df_height['vyska_odhad_pocty_bytu'],
                                                  bins=list(range(2,32,2))+[85])
df_height_hist_byty = df_height.groupby('vyska_odhad_pocty_bytu_bins')['kod'].count().reset_index()
In [10]:
df_height['vyska_odhad_pocty_pater_bins'] = pd.cut(x=df_height['vyska_odhad_pocty_pater'], 
                                                   bins=list(range(2,32,2))+[85])
df_height_hist_patra = df_height.groupby('vyska_odhad_pocty_pater_bins')['kod'].count().reset_index()
In [11]:
df_height['vyska_kompletni_bins'] = pd.cut(x=df_height['vyska_kompletni'], bins=list(range(2,32,2))+[85])
df_height_hist = df_height.groupby('vyska_kompletni_bins')['kod'].count().reset_index()
In [12]:
display('df_height_hist', 'df_height_hist_patra', 'df_height_hist_byty')
Out[12]:

df_height_hist

vyska_kompletni_bins kod
0 (2, 4] 16890
1 (4, 6] 7884
2 (6, 8] 18167
3 (8, 10] 9748
4 (10, 12] 6219
5 (12, 14] 3796
6 (14, 16] 1751
7 (16, 18] 1447
8 (18, 20] 1065
9 (20, 22] 927
10 (22, 24] 726
11 (24, 26] 948
12 (26, 28] 440
13 (28, 30] 137
14 (30, 85] 333

df_height_hist_patra

vyska_odhad_pocty_pater_bins kod
0 (2, 4] 1
1 (4, 6] 0
2 (6, 8] 0
3 (8, 10] 32559
4 (10, 12] 3829
5 (12, 14] 0
6 (14, 16] 2434
7 (16, 18] 1919
8 (18, 20] 10
9 (20, 22] 925
10 (22, 24] 521
11 (24, 26] 745
12 (26, 28] 415
13 (28, 30] 7
14 (30, 85] 234

df_height_hist_byty

vyska_odhad_pocty_bytu_bins kod
0 (2, 4] 0
1 (4, 6] 3
2 (6, 8] 23795
3 (8, 10] 6325
4 (10, 12] 1473
5 (12, 14] 681
6 (14, 16] 877
7 (16, 18] 2557
8 (18, 20] 1555
9 (20, 22] 665
10 (22, 24] 147
11 (24, 26] 856
12 (26, 28] 40
13 (28, 30] 7
14 (30, 85] 191
In [13]:
x_barplot= df_height_hist_patra['vyska_odhad_pocty_pater_bins'].astype('str')
In [14]:
import plotly.graph_objects as go
animals=['vyska_odhad_pocty_bytu', 'vyska_odhad_pocty_pater']

fig = go.Figure(data=[
    go.Bar(name='vyska_odhad_pocty_bytu', x=x_barplot.to_list(), y=df_height_hist_byty['kod'].to_list(),),
    go.Bar(name='vyska_odhad_pocty_pater', x=x_barplot.to_list(), y=df_height_hist_patra['kod'].to_list()),
    go.Bar(name='kompletni_vyska', x=x_barplot.to_list(), y=df_height_hist['kod'].to_list()),
    
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
(2, 4](4, 6](6, 8](8, 10](10, 12](12, 14](14, 16](16, 18](18, 20](20, 22](22, 24](24, 26](26, 28](28, 30](30, 85]05k10k15k20k25k30k
vyska_odhad_pocty_bytuvyska_odhad_pocty_paterkompletni_vyska

Počet budov na počte bytov pri jednom adresnom mieste

In [15]:
bouild_counts = pd.read_csv('/home/zuzka/Desktop/statistics/pocet_bytu_na_adresni_misto.csv',sep = ',') 
bouild_counts
Out[15]:
kod pocet_bytu_na_adresni_misto
0 19367619 2
1 18912427 1
2 18919294 1
3 18936261 2
4 19390131 1
... ... ...
33974 19095490 1
33975 19096089 2
33976 19096836 1
33977 19096895 12
33978 27617637 1

33979 rows × 2 columns

In [16]:
bouild_counts['vyska_odhad_pocty_bytu'] = bouild_counts.kod.map(df_height.set_index('kod')['vyska_odhad_pocty_bytu'])
bouild_counts['interval_pocet_bytov'] = pd.cut(x=bouild_counts['pocet_bytu_na_adresni_misto'], bins=list(range(4,38,4))+[50, 200])
bouild_counts
Out[16]:
kod pocet_bytu_na_adresni_misto vyska_odhad_pocty_bytu interval_pocet_bytov
0 19367619 2 9.887923 NaN
1 18912427 1 7.899848 NaN
2 18919294 1 7.899848 NaN
3 18936261 2 9.887923 NaN
4 19390131 1 7.899848 NaN
... ... ... ... ...
33974 19095490 1 7.899848 NaN
33975 19096089 2 9.887923 NaN
33976 19096836 1 7.899848 NaN
33977 19096895 12 17.499078 (8.0, 12.0]
33978 27617637 1 7.899848 NaN

33979 rows × 4 columns

In [17]:
bouild_counts_bar = bouild_counts.groupby('interval_pocet_bytov').agg({'kod':'count', 'vyska_odhad_pocty_bytu': 'sum'}).reset_index().rename(columns={'kod':'count'})
bouild_counts_bar['average_height'] = bouild_counts_bar['vyska_odhad_pocty_bytu']/bouild_counts_bar['count']
bouild_counts_bar['interval_pocet_bytov'] = bouild_counts_bar['interval_pocet_bytov'].astype('str')

bouild_counts_bar
Out[17]:
interval_pocet_bytov count vyska_odhad_pocty_bytu average_height
0 (4, 8] 1651 25861.13 15.663919
1 (8, 12] 1396 24873.12 17.817421
2 (12, 16] 1000 18449.54 18.449540
3 (16, 20] 562 10841.03 19.290089
4 (20, 24] 556 12973.52 23.333669
5 (24, 28] 203 4654.11 22.926650
6 (28, 32] 391 9472.25 24.225703
7 (32, 36] 45 874.81 19.440222
8 (36, 50] 168 4380.36 26.073571
9 (50, 200] 187 6410.38 34.280107
In [20]:
data = px.data.gapminder()

fig = px.bar(bouild_counts_bar, x='interval_pocet_bytov', y='count',
             hover_data=['average_height'], color='count',
             labels={'interval_pocet_bytov':'Pocet bytov', 'count':'Pocet Objektov' }, height=300)
fig.show()
(4, 8](8, 12](12, 16](16, 20](20, 24](24, 28](28, 32](32, 36](36, 50](50, 200]050010001500
50010001500Pocet ObjektovPocet bytovPocet Objektov
In [21]:
data = px.data.gapminder()

fig = px.bar(bouild_counts_bar, x='interval_pocet_bytov', y='average_height',
             hover_data=['average_height'], color='average_height',
             labels={'interval_pocet_bytov':'Pocet bytov', 'average_height':'Priemerna vyska objektu' }, height=300)
fig.show()
(4, 8](8, 12](12, 16](16, 20](20, 24](24, 28](28, 32](32, 36](36, 50](50, 200]0102030
202530Priemerna vyska objektuPocet bytovPriemerna vyska objektu

Počet objektov na základe počtu pater

In [23]:
floor_counts_polygon = pd.read_csv('/home/zuzka/Desktop/statistics/so_polygon_dwh.csv',sep = ',') 
floor_counts_polygon.columns
Out[23]:
Index(['kod', 'dokonceni', 'dokonceni_rok', 'je_vytah', 'druh_konstrukce',
       'obestaveny_prostor', 'obestaveny_prostor_cat',
       'obestaveny_prostor_order', 'pocet_bytu', 'pocet_bytu_cat',
       'pocet_bytu_order', 'pocet_podlazi', 'pocet_podlazi_cat',
       'pocet_podlazi_order', 'podlahova_plocha', 'podlahova_plocha_cat',
       'podlahova_plocha_order', 'zastavena_plocha', 'zastavena_plocha_cat',
       'zastavena_plocha_order', 'pripoj_el_energie', 'pripoj_kanal_sit',
       'pripoj_plyn', 'pripoj_vodovod', 'typ', 'zpusob_vytapeni',
       'zpusob_vyuziti', 'x_min', 'x_max', 'y_min', 'y_max', 'kod_zsj_d'],
      dtype='object')
In [24]:
floor_counts = floor_counts_polygon[['pocet_podlazi', 'kod']]
floor_counts
Out[24]:
pocet_podlazi kod
0 50 19415982
1 1 18879594
2 0 42492572
3 0 42652812
4 0 45521514
... ... ...
74845 1 19370105
74846 1 19411740
74847 1 19413505
74848 1 19415907
74849 1 19413467

74850 rows × 2 columns

In [25]:
floor_counts['vyska_odhad_pocty_pater'] = floor_counts.kod.map(df_height.set_index('kod')['vyska_odhad_pocty_bytu'])
floor_counts['interval_pocet_pater'] = pd.cut(x=floor_counts['pocet_podlazi'], bins=list(range(0,10,1))+[12, 50])
floor_counts
/home/zuzka/.local/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/home/zuzka/.local/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Out[25]:
pocet_podlazi kod vyska_odhad_pocty_pater interval_pocet_pater
0 50 19415982 NaN (12.0, 50.0]
1 1 18879594 NaN (0.0, 1.0]
2 0 42492572 NaN NaN
3 0 42652812 NaN NaN
4 0 45521514 NaN NaN
... ... ... ... ...
74845 1 19370105 NaN (0.0, 1.0]
74846 1 19411740 NaN (0.0, 1.0]
74847 1 19413505 NaN (0.0, 1.0]
74848 1 19415907 NaN (0.0, 1.0]
74849 1 19413467 NaN (0.0, 1.0]

74850 rows × 4 columns

In [26]:
floor_counts_bar = floor_counts.groupby('interval_pocet_pater').agg({'kod':'count', 'vyska_odhad_pocty_pater': 'sum'}).reset_index().rename(columns={'kod':'count'})
floor_counts_bar['average_height'] = floor_counts_bar['vyska_odhad_pocty_pater']/floor_counts_bar['count']
floor_counts_bar['interval_pocet_pater'] = floor_counts_bar['interval_pocet_pater'].astype('str')

floor_counts_bar
Out[26]:
interval_pocet_pater count vyska_odhad_pocty_pater average_height
0 (0, 1] 26674 198288.895982 7.433789
1 (1, 2] 5440 40412.732586 7.428811
2 (2, 3] 3525 33011.796904 9.365049
3 (3, 4] 2306 32280.483484 13.998475
4 (4, 5] 1852 29966.259807 16.180486
5 (5, 6] 880 14912.212630 16.945696
6 (6, 7] 499 8770.453382 17.576059
7 (7, 8] 732 16551.789356 22.611734
8 (8, 9] 340 7802.075839 22.947282
9 (9, 12] 195 5098.683921 26.147097
10 (12, 50] 146 4315.147493 29.555805
In [27]:
floor_counts_bar= floor_counts_bar.iloc[1:]
data = px.data.gapminder()

fig = px.bar(floor_counts_bar, x='interval_pocet_pater', y='count',
             hover_data=['average_height'], color='count',
             labels={'interval_pocet_pater':'Pocet pater', 'count':'Pocet Objektov' }, height=300)
fig.show()
(1, 2](2, 3](3, 4](4, 5](5, 6](6, 7](7, 8](8, 9](9, 12](12, 50]020004000
20004000Pocet ObjektovPocet paterPocet Objektov
In [28]:
#floor_counts_bar= floor_counts_bar.iloc[1:]
data = px.data.gapminder()

fig = px.bar(floor_counts_bar, x='interval_pocet_pater', y='average_height',
             hover_data=['count'], color='average_height',
             labels={'interval_pocet_pater':'Pocet pater', 'average_height':'Priemerna vyska objektov' }, height=300)
fig.show()
(1, 2](2, 3](3, 4](4, 5](5, 6](6, 7](7, 8](8, 9](9, 12](12, 50]0102030
10152025Priemerna vyska objektovPocet paterPriemerna vyska objektov

Budovy v Brne podľa sposobu využitia

In [30]:
bar_zpusob_vyuziti = floor_counts_polygon.groupby('zpusob_vyuziti')['kod'].count().reset_index().sort_values(by='kod',ascending=False,)
bar_zpusob_vyuziti
Out[30]:
zpusob_vyuziti kod
8 Rodinný dům 21822
1 Garáž 13746
4 Objekt k bydlení 11947
14 Stavba pro rodinnou rekreaci 8288
0 Bytový dům 5044
2 Jiná stavba 3826
3 Nezjištěno 2295
16 Stavba pro výrobu a skladování 1678
10 Stavba občanského vybavení 1545
17 Stavba technického vybavení 1390
6 Objekt občanské vybavenosti 991
20 Zemědělská stavba 515
7 Průmyslový objekt 475
11 Stavba pro administrativu 470
19 Víceúčelová stavba 277
12 Stavba pro dopravu 249
13 Stavba pro obchod 149
18 Stavba ubytovacího zařízení 102
9 Skleník 13
5 Objekt lesního hospodářství 12
21 Zemědělská usedlost 12
15 Stavba pro shromažďování většího počtu osob 4
In [31]:
buildings=bar_zpusob_vyuziti['zpusob_vyuziti'].to_list()
fig = go.Figure([go.Bar(x=buildings, y=bar_zpusob_vyuziti['kod'].to_list())])
fig.show()
Rodinný důmGarážObjekt k bydleníStavba pro rodinnou rekreaciBytový důmJiná stavbaNezjištěnoStavba pro výrobu a skladováníStavba občanského vybaveníStavba technického vybaveníObjekt občanské vybavenostiZemědělská stavbaPrůmyslový objektStavba pro administrativuVíceúčelová stavbaStavba pro dopravuStavba pro obchodStavba ubytovacího zařízeníSkleníkObjekt lesního hospodářstvíZemědělská usedlostStavba pro shromažďování většího počtu osob05k10k15k20k

Analyza odhadov výšky

In [34]:
zaznacene_chyba = df_height[df_height.values  == "zaznacene"]
In [35]:
err_analysis = zaznacene_chyba[['kod', 'vyska_zmerena', 'vyska_odhad_pocty_bytu', 'vyska_odhad_pocty_pater', 'zpusob_vyuziti']]
In [36]:
err_analysis
Out[36]:
kod vyska_zmerena vyska_odhad_pocty_bytu vyska_odhad_pocty_pater zpusob_vyuziti
7 19367619 11.47 9.887923 8.083549 7.0
1140 18883915 11.14 NaN NaN 8.0
2589 18894232 7.35 NaN 8.083549 7.0
3174 18912427 5.85 7.899848 8.083549 7.0
3177 18919294 8.58 7.899848 8.083549 7.0
... ... ... ... ... ...
72232 19096836 9.15 7.899848 8.083549 3.0
72234 19096895 13.68 17.499078 16.999154 3.0
72236 19100566 2.18 NaN NaN 18.0
72238 27617637 3.50 7.899848 NaN 15.0
72240 30615348 2.56 NaN NaN 18.0

55721 rows × 5 columns

In [37]:
err_analysis['diff_zmerana_byty_%'] = abs((err_analysis['vyska_zmerena'] -
                                     err_analysis['vyska_odhad_pocty_bytu'])/(err_analysis['vyska_zmerena']/100))
err_analysis['diff_zmerana_patra_%'] = abs((err_analysis['vyska_zmerena'] - 
                                      err_analysis['vyska_odhad_pocty_pater'])/(err_analysis['vyska_zmerena']/100))
/home/zuzka/.local/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/home/zuzka/.local/lib/python3.6/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [38]:
err_analysis.groupby('zpusob_vyuziti')['diff_zmerana_patra_%'].sum().reset_index().sort_values('diff_zmerana_patra_%', ascending=True).head()
Out[38]:
zpusob_vyuziti diff_zmerana_patra_%
20 21.0 0.000000
1 2.0 43.165797
3 4.0 51.703530
8 9.0 113.804304
16 17.0 1496.552426
In [39]:
grouped_err = err_analysis.groupby('zpusob_vyuziti').agg({'kod':'count', 'diff_zmerana_byty_%': 'sum',
                                                          'diff_zmerana_patra_%': 'sum' }).reset_index().rename(columns={'kod':'count'})
grouped_err
grouped_err['priemerna_chyba_byty'] = grouped_err['diff_zmerana_byty_%']/grouped_err['count']
grouped_err['priemerna_chyba_patra'] = grouped_err['diff_zmerana_patra_%']/grouped_err['count']
grouped_err
Out[39]:
zpusob_vyuziti count diff_zmerana_byty_% diff_zmerana_patra_% priemerna_chyba_byty priemerna_chyba_patra
0 1.0 351 313.185355 2757.428392 0.892266 7.855921
1 2.0 12 70.284679 43.165797 5.857057 3.597150
2 3.0 7563 193924.235413 203961.329386 25.641179 26.968310
3 4.0 8 44.159732 51.703530 5.519966 6.462941
4 5.0 633 3545.775582 15423.174235 5.601541 24.365204
5 6.0 5502 143996.970475 111978.782877 26.171750 20.352378
6 7.0 20642 559428.830296 583800.967543 27.101484 28.282190
7 8.0 4048 1013.725665 26136.461943 0.250426 6.456636
8 9.0 2 0.000000 113.804304 0.000000 56.902152
9 10.0 119 251.417057 5001.089901 2.112748 42.025966
10 11.0 96 481.452644 3470.835591 5.015132 36.154537
11 12.0 1272 592.000779 14239.075763 0.465409 11.194242
12 13.0 341 220.842534 1909.652244 0.647632 5.600153
13 14.0 439 2158.879956 13372.060170 4.917722 30.460274
14 15.0 1440 8752.832986 38305.452977 6.078356 26.601009
15 16.0 771 228.780379 8619.622853 0.296732 11.179796
16 17.0 158 868.831851 1496.552426 5.498936 9.471851
17 18.0 8499 0.000000 181296.469846 0.000000 21.331506
18 19.0 2639 6969.775128 47615.709550 2.641067 18.043088
19 20.0 252 8041.833164 9639.775285 31.912036 38.253077
20 21.0 2 0.000000 0.000000 0.000000 0.000000
In [40]:
err_analysis['intervaly_vysky_byty'] = pd.cut(x=err_analysis['vyska_odhad_pocty_bytu'],
                                                  bins=list(range(2,32,2))+[85])
df_height_byty = err_analysis.groupby('intervaly_vysky_byty')['kod'].count().reset_index()
df_height_byty
/home/zuzka/.local/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Out[40]:
intervaly_vysky_byty kod
0 (2, 4] 0
1 (4, 6] 1
2 (6, 8] 20595
3 (8, 10] 5439
4 (10, 12] 1240
5 (12, 14] 564
6 (14, 16] 765
7 (16, 18] 2243
8 (18, 20] 1378
9 (20, 22] 597
10 (22, 24] 132
11 (24, 26] 812
12 (26, 28] 33
13 (28, 30] 7
14 (30, 85] 178
In [41]:
err_analysis['intervaly_vysky_patra'] = pd.cut(x=err_analysis['vyska_odhad_pocty_pater'],
                                                  bins=list(range(2,32,2))+[85])
df_height_patra = err_analysis.groupby('intervaly_vysky_patra')['kod'].count().reset_index()
df_height_patra
/home/zuzka/.local/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Out[41]:
intervaly_vysky_patra kod
0 (2, 4] 1
1 (4, 6] 0
2 (6, 8] 0
3 (8, 10] 27394
4 (10, 12] 3203
5 (12, 14] 0
6 (14, 16] 2075
7 (16, 18] 1703
8 (18, 20] 7
9 (20, 22] 841
10 (22, 24] 449
11 (24, 26] 688
12 (26, 28] 389
13 (28, 30] 5
14 (30, 85] 216
In [42]:
err_analysis['intervaly_vysky_komplet'] = pd.cut(x=err_analysis['vyska_zmerena'],
                                                  bins=list(range(2,32,2))+[85])
df_height_komplet= err_analysis.groupby('intervaly_vysky_komplet')['kod'].count().reset_index()
df_height_komplet
/home/zuzka/.local/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

Out[42]:
intervaly_vysky_komplet kod
0 (2, 4] 12107
1 (4, 6] 7882
2 (6, 8] 10924
3 (8, 10] 7428
4 (10, 12] 5898
5 (12, 14] 3679
6 (14, 16] 1606
7 (16, 18] 1111
8 (18, 20] 887
9 (20, 22] 846
10 (22, 24] 702
11 (24, 26] 900
12 (26, 28] 430
13 (28, 30] 136
14 (30, 85] 318
In [43]:
x_barplot= df_height_komplet['intervaly_vysky_komplet'].astype('str')
In [44]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=x_barplot.to_list(),
        y=df_height_komplet['kod'].to_list(), name = 'Vyska zmerana'))
           
fig.add_trace(
    go.Bar(
        x=x_barplot.to_list(),
        y=df_height_patra['kod'].to_list(), name = 'Vysky podla pater'
    ))

fig.show()
(2, 4](4, 6](6, 8](8, 10](10, 12](12, 14](14, 16](16, 18](18, 20](20, 22](22, 24](24, 26](26, 28](28, 30](30, 85]05k10k15k20k25k
Vyska zmeranaVysky podla pater
In [45]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=x_barplot.to_list(),
        y=df_height_komplet['kod'].to_list(), name = 'Vyska zmerana'))
           
fig.add_trace(
    go.Bar(
        x=x_barplot.to_list(),
        y=df_height_byty['kod'].to_list(), name = 'Vysky podla bytov'
    ))

fig.show()
(2, 4](4, 6](6, 8](8, 10](10, 12](12, 14](14, 16](16, 18](18, 20](20, 22](22, 24](24, 26](26, 28](28, 30](30, 85]05k10k15k20k
Vyska zmeranaVysky podla bytov
In [ ]: